#encoding=utf-8
import sqlite3
import requests
import re
from bs4 import BeautifulSoup
baidu_urls=["http://www.baidu.com/s?ie=utf-8&mod=1&isbd=1&isid=1DDD893A1C916352&wd=powered%20by%20discuz&pn={}0&oq=powered%20by%20discuz&tn=93728684_hao_pg&ie=utf-8&f=3&usm=1&rsv_idx=2&rsv_pq=ed6f784e000aab94&rsv_t=ff17dsjdoZk6%2B%2BUXUFojkWc1KNhjEIv7ALNfmr0T5C%2Fiv3Eq%2B4TwSpoaK2tfWOs0VLIiNiUE&bs=powered%20by%20discuz&rsv_sid=undefined&_ss=1&clist=&hsug=&f4s=1&csor=17&_cr1=28000".format(str(i)) for i in range(0,76)]
headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
}
def re_index(target_url):
    url_re=re.compile(r"(http://.*?\/)")
    url=url_re.findall(target_url)
    return url

def getPages(baidu_url):
    response=requests.get(baidu_url,headers=headers,timeout=8)
    response_soup=BeautifulSoup(response.text,"html.parser")
    hrefs=response_soup.select("div.result > h3 > a")
    for href in hrefs:
            try :
                pagename=href.get_text()
                TargetSite=requests.get(href.get("href"),timeout=8)
                TargetUrl=re_index(TargetSite.url)
                TargetPage=requests.get(TargetUrl[0]+"forum.php",timeout=8)
                if TargetPage.status_code==200:
                    TargetPage_soup=BeautifulSoup(TargetPage.text,"html.parser")
                    title_soup=TargetPage_soup.select_one("title")
                    title=title_soup.get_text()
                    description_soup=TargetPage_soup.select_one("head > meta[name=description]")
                    print "title:",title,"url:",TargetUrl[0],"description:",description_soup['content']
            except Exception,e:
                print Exception,':',e

for url in baidu_urls:
    getPages(url)
